us <- read.csv("https://raw.githubusercontent.com/JaclynCoate/6373_Time_Series/master/TermProject/Data/USdaily.7.26.20.csv", header = T, strip.white = T)
us <- transform(us, date = as.Date(as.character(date), "%Y%m%d"))
us <- subset(us, select = -c(states, dateChecked, hospitalized, lastModified, total, posNeg, totalTestResultsIncrease, hash))
us[is.na(us)] <- 0
us = us[order(as.Date(us$date, format = "%Y%m%d")),]
head(us)
## date positive negative pending hospitalizedCurrently
## 187 2020-01-22 2 0 0 0
## 186 2020-01-23 2 0 0 0
## 185 2020-01-24 2 0 0 0
## 184 2020-01-25 2 0 0 0
## 183 2020-01-26 2 0 0 0
## 182 2020-01-27 2 0 0 0
## hospitalizedCumulative inIcuCurrently inIcuCumulative
## 187 0 0 0
## 186 0 0 0
## 185 0 0 0
## 184 0 0 0
## 183 0 0 0
## 182 0 0 0
## onVentilatorCurrently onVentilatorCumulative recovered death
## 187 0 0 0 0
## 186 0 0 0 0
## 185 0 0 0 0
## 184 0 0 0 0
## 183 0 0 0 0
## 182 0 0 0 0
## totalTestResults deathIncrease hospitalizedIncrease negativeIncrease
## 187 2 0 0 0
## 186 2 0 0 0
## 185 2 0 0 0
## 184 2 0 0 0
## 183 2 0 0 0
## 182 2 0 0 0
## positiveIncrease
## 187 0
## 186 0
## 185 0
## 184 0
## 183 0
## 182 0
It is difficult to assume stationarity for this data due to multiple factors. We are working under the assumption that COVID is a novel virus and cases as well as hospitalizations will eventually return to zero. This being said our current modeling techniques do things such as return to the median or mimic the previously seen trends. Also, we see a heavy wandering trend in both new cases and hospitalization would be dependent on this as well as time. We will review the data and see what, if any, non-stationary components reveal themselves and model the data accordingly.
Traits:
ggplot(data = us, aes(x=date, y=hospitalizedCurrently))+
geom_line(color="orange")+
labs(title = "Current COVID Hospitalized Cases US", y = "Thousands", x = "") +
theme_fivethirtyeight()
When reviewing the variables and the scatter plot from our previous EDA we can see that there are some correlations that were expected, for example currentHospitalizations is correlated with the variables that refelct ICU and Ventilaor patients. These metrics wouldn’t exist without hospitalizations. These variables also cannot help up predict hospitalizations because they after occurances. So we will actually be leveraing variables such as positive increase in order to see if there is some sort of correlation between hospitalized patients and the number of positive cases.
ggplot(data = us, aes(x=date, y=positiveIncrease))+
geom_line(color="orange")+
labs(title = "Increase in COVID Cases US", y = "Thousands", x = "") +
theme_fivethirtyeight()
Since we are working with a seasonal and transformation component but it requires an additional transformation for the total positive COVID cases to become stationary for evaluation, we will only use positive increase of cases to predict currently hospitzliaed cases for the VAR model.
#Positive Cases Transformations
#pos.diff = artrans.wge(us$positive, phi.tr = 1, lag.max = 100)
#pos.diff.2 = artrans.wge(pos.diff, phi.tr = 1, lag.max = 100)
#pos.trans = artrans.wge(pos.diff.2,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#Increase Positive Transformation
inpos.diff = artrans.wge(us$positiveIncrease, phi.tr = 1, lag.max = 100)
inpos.trans = artrans.wge(inpos.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#Current Hospitalization Transformations
us.diff = artrans.wge(us$hospitalizedCurrently, phi.tr = 1, lag.max = 100)
currhosp.trans = artrans.wge(us.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#VARselect to choose lag
VARselect(cbind(currhosp.trans, inpos.trans), lag.max = 10, type= "both")
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 7 7 2 7
##
## $criteria
## 1 2 3 4 5
## AIC(n) 3.093857e+01 3.083329e+01 3.079091e+01 3.076976e+01 3.069554e+01
## HQ(n) 3.101650e+01 3.095018e+01 3.094676e+01 3.096458e+01 3.092932e+01
## SC(n) 3.113058e+01 3.112131e+01 3.117493e+01 3.124979e+01 3.127158e+01
## FPE(n) 2.731960e+13 2.459304e+13 2.357880e+13 2.309552e+13 2.145770e+13
## 6 7 8 9 10
## AIC(n) 3.066656e+01 3.051427e+01 3.053424e+01 3.059674e+01 3.059649e+01
## HQ(n) 3.093930e+01 3.082598e+01 3.088492e+01 3.098638e+01 3.102509e+01
## SC(n) 3.133860e+01 3.128233e+01 3.139830e+01 3.155681e+01 3.165257e+01
## FPE(n) 2.086403e+13 1.793906e+13 1.833019e+13 1.955149e+13 1.959499e+13
#fit model
usdiff.var.fit = VAR(cbind(currhosp.trans, inpos.trans), type = "both", p = 2)
#7 day predictions
pred.var7 = predict(usdiff.var.fit, n.ahead = 7)
pred.var7$fcst$currhosp.trans
## fcst lower upper CI
## [1,] -42.12350 -2942.410 2858.163 2900.286
## [2,] -385.17371 -3346.493 2576.145 2961.319
## [3,] -80.65766 -3262.529 3101.214 3181.871
## [4,] -124.14161 -3327.324 3079.041 3203.182
## [5,] -46.62794 -3284.538 3191.282 3237.910
## [6,] -43.58380 -3288.207 3201.039 3244.623
## [7,] -11.23201 -3262.277 3239.813 3251.045
b. 90 Day
#90 day predictions
pred.var90 = predict(usdiff.var.fit, n.ahead = 90)
pred.var90$fcst$currhosp.trans
## fcst lower upper CI
## [1,] -42.123501 -2942.410 2858.163 2900.286
## [2,] -385.173707 -3346.493 2576.145 2961.319
## [3,] -80.657665 -3262.529 3101.214 3181.871
## [4,] -124.141611 -3327.324 3079.041 3203.182
## [5,] -46.627942 -3284.538 3191.282 3237.910
## [6,] -43.583795 -3288.207 3201.039 3244.623
## [7,] -11.232008 -3262.277 3239.813 3251.045
## [8,] -6.349200 -3259.226 3246.528 3252.877
## [9,] 7.305513 -3246.870 3261.481 3254.175
## [10,] 12.421029 -3242.210 3267.053 3254.632
## [11,] 18.773716 -3236.131 3273.679 3254.905
## [12,] 22.480950 -3232.533 3277.495 3255.014
## [13,] 26.203504 -3228.870 3281.277 3255.073
## [14,] 28.923414 -3226.175 3284.022 3255.099
## [15,] 31.503265 -3223.608 3286.615 3255.112
## [16,] 33.699320 -3221.418 3288.817 3255.117
## [17,] 35.776726 -3219.344 3290.897 3255.120
## [18,] 37.697587 -3217.424 3292.819 3255.122
## [19,] 39.549962 -3215.572 3294.672 3255.122
## [20,] 41.334045 -3213.789 3296.457 3255.123
## [21,] 43.082251 -3212.041 3298.205 3255.123
## [22,] 44.799892 -3210.323 3299.923 3255.123
## [23,] 46.499486 -3208.623 3301.622 3255.123
## [24,] 48.185069 -3206.938 3303.308 3255.123
## [25,] 49.861824 -3205.261 3304.985 3255.123
## [26,] 51.532055 -3203.591 3306.655 3255.123
## [27,] 53.198022 -3201.925 3308.321 3255.123
## [28,] 54.860928 -3200.262 3309.984 3255.123
## [29,] 56.521787 -3198.601 3311.645 3255.123
## [30,] 58.181203 -3196.942 3313.304 3255.123
## [31,] 59.839642 -3195.283 3314.963 3255.123
## [32,] 61.497398 -3193.626 3316.620 3255.123
## [33,] 63.154688 -3191.968 3318.278 3255.123
## [34,] 64.811654 -3190.311 3319.935 3255.123
## [35,] 66.468399 -3188.655 3321.591 3255.123
## [36,] 68.124990 -3186.998 3323.248 3255.123
## [37,] 69.781476 -3185.341 3324.904 3255.123
## [38,] 71.437889 -3183.685 3326.561 3255.123
## [39,] 73.094252 -3182.029 3328.217 3255.123
## [40,] 74.750580 -3180.372 3329.874 3255.123
## [41,] 76.406884 -3178.716 3331.530 3255.123
## [42,] 78.063172 -3177.060 3333.186 3255.123
## [43,] 79.719449 -3175.404 3334.842 3255.123
## [44,] 81.375717 -3173.747 3336.499 3255.123
## [45,] 83.031981 -3172.091 3338.155 3255.123
## [46,] 84.688240 -3170.435 3339.811 3255.123
## [47,] 86.344498 -3168.778 3341.467 3255.123
## [48,] 88.000753 -3167.122 3343.124 3255.123
## [49,] 89.657007 -3165.466 3344.780 3255.123
## [50,] 91.313260 -3163.810 3346.436 3255.123
## [51,] 92.969513 -3162.153 3348.092 3255.123
## [52,] 94.625766 -3160.497 3349.749 3255.123
## [53,] 96.282018 -3158.841 3351.405 3255.123
## [54,] 97.938270 -3157.185 3353.061 3255.123
## [55,] 99.594521 -3155.528 3354.717 3255.123
## [56,] 101.250773 -3153.872 3356.374 3255.123
## [57,] 102.907025 -3152.216 3358.030 3255.123
## [58,] 104.563276 -3150.560 3359.686 3255.123
## [59,] 106.219528 -3148.903 3361.342 3255.123
## [60,] 107.875779 -3147.247 3362.999 3255.123
## [61,] 109.532031 -3145.591 3364.655 3255.123
## [62,] 111.188282 -3143.935 3366.311 3255.123
## [63,] 112.844534 -3142.278 3367.967 3255.123
## [64,] 114.500785 -3140.622 3369.624 3255.123
## [65,] 116.157037 -3138.966 3371.280 3255.123
## [66,] 117.813288 -3137.310 3372.936 3255.123
## [67,] 119.469540 -3135.653 3374.592 3255.123
## [68,] 121.125791 -3133.997 3376.249 3255.123
## [69,] 122.782043 -3132.341 3377.905 3255.123
## [70,] 124.438294 -3130.685 3379.561 3255.123
## [71,] 126.094546 -3129.028 3381.218 3255.123
## [72,] 127.750797 -3127.372 3382.874 3255.123
## [73,] 129.407049 -3125.716 3384.530 3255.123
## [74,] 131.063300 -3124.060 3386.186 3255.123
## [75,] 132.719552 -3122.403 3387.843 3255.123
## [76,] 134.375803 -3120.747 3389.499 3255.123
## [77,] 136.032055 -3119.091 3391.155 3255.123
## [78,] 137.688306 -3117.435 3392.811 3255.123
## [79,] 139.344558 -3115.778 3394.468 3255.123
## [80,] 141.000809 -3114.122 3396.124 3255.123
## [81,] 142.657061 -3112.466 3397.780 3255.123
## [82,] 144.313312 -3110.810 3399.436 3255.123
## [83,] 145.969564 -3109.153 3401.093 3255.123
## [84,] 147.625815 -3107.497 3402.749 3255.123
## [85,] 149.282067 -3105.841 3404.405 3255.123
## [86,] 150.938318 -3104.185 3406.061 3255.123
## [87,] 152.594570 -3102.528 3407.718 3255.123
## [88,] 154.250821 -3100.872 3409.374 3255.123
## [89,] 155.907073 -3099.216 3411.030 3255.123
## [90,] 157.563324 -3097.560 3412.686 3255.123
startingPoints7 = us$hospitalizedCurrently[126:132]
currHospForecasts7 = pred.var7$fcst$currhosp.trans[,1:3] + startingPoints7
b. 90 Day
startingPoints90 = us$hospitalizedCurrently[43:132]
currHospForecasts90 = pred.var90$fcst$currhosp.trans[,1:3] + startingPoints90
#7 day Forecasts
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,139), ylab = "Currently Hospitalized COVID Patients", main = "7 Day Currently Hospitalized Patients Forecast")
lines(seq(133,139,1), as.data.frame(currHospForecasts7)$fcst, type = "l", col = "red")
b. 90 Day
#90 day Forecasts
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,222), ylab = "Currently Hospitalized COVID Patients", main = "90 Day Currently Hospitalized Patients Forecast")
lines(seq(133,222,1), as.data.frame(currHospForecasts90)$fcst, type = "l", col = "red")
#varASE = mean((us$hospitalizedCurrently[123:132]-pred.var$fcst$y1[1:10])^2)
#varASE
Since we have been looking at additional regressors for all our above models and know that the best regressor to leverage will be positive increase of COVID cases. This regressor reflects similar behavior to that of current hospitlizations and can be model properly against hospitalizations. 1. Create Current Hospitalized ts variable
tUScurrhop = ts(us$hospitalizedCurrently)
tUSx = data.frame(positiveIncrease = ts(us$positiveIncrease))
fit.mlp.new = mlp(ts(tUSx), reps = 50, comb = "mean")
mlp.new.fore7 = forecast(fit.mlp.new, h = 7)
mlp.new.fore7
## Point Forecast
## 133 60022.23
## 134 63703.79
## 135 65465.01
## 136 66514.70
## 137 65662.89
## 138 65030.57
## 139 64549.13
b. 90 Day Forecast for Regressor
mlp.new.fore90 = forecast(fit.mlp.new, h = 90)
mlp.new.fore90
## Point Forecast
## 133 60022.23
## 134 63703.79
## 135 65465.01
## 136 66514.70
## 137 65662.89
## 138 65030.57
## 139 64549.13
## 140 64841.09
## 141 65153.74
## 142 65416.89
## 143 65407.32
## 144 65312.68
## 145 65086.79
## 146 65086.33
## 147 65141.91
## 148 65285.80
## 149 65337.72
## 150 65339.22
## 151 65203.62
## 152 65132.75
## 153 65136.06
## 154 65237.91
## 155 65310.28
## 156 65340.96
## 157 65272.21
## 158 65162.37
## 159 65138.63
## 160 65197.44
## 161 65281.46
## 162 65327.43
## 163 65312.92
## 164 65202.48
## 165 65154.10
## 166 65163.65
## 167 65246.95
## 168 65304.72
## 169 65326.67
## 170 65256.25
## 171 65172.70
## 172 65153.24
## 173 65212.76
## 174 65280.81
## 175 65320.37
## 176 65297.83
## 177 65197.72
## 178 65161.98
## 179 65180.20
## 180 65253.64
## 181 65302.96
## 182 65317.58
## 183 65239.50
## 184 65176.04
## 185 65161.42
## 186 65224.66
## 187 65283.02
## 188 65317.72
## 189 65283.32
## 190 65192.38
## 191 65163.94
## 192 65194.32
## 193 65261.09
## 194 65305.04
## 195 65309.56
## 196 65223.43
## 197 65175.53
## 198 65169.22
## 199 65235.08
## 200 65287.03
## 201 65316.29
## 202 65267.72
## 203 65188.04
## 204 65164.37
## 205 65207.13
## 206 65268.25
## 207 65308.30
## 208 65300.29
## 209 65210.14
## 210 65173.17
## 211 65178.87
## 212 65244.87
## 213 65292.20
## 214 65314.14
## 215 65250.46
## 216 65184.51
## 217 65165.45
## 218 65218.85
## 219 65274.88
## 220 65311.36
## 221 65288.90
## 222 65200.09
new.regressor7 <- data.frame(c(us$positiveIncrease, mlp.new.fore7$mean))
new.regressor7
## c.us.positiveIncrease..mlp.new.fore7.mean.
## 1 3613.00
## 2 3171.00
## 3 4671.00
## 4 6255.00
## 5 6885.00
## 6 9259.00
## 7 11442.00
## 8 10632.00
## 9 12873.00
## 10 17656.00
## 11 19044.00
## 12 19724.00
## 13 19655.00
## 14 21897.00
## 15 24694.00
## 16 25773.00
## 17 27992.00
## 18 31945.00
## 19 33252.00
## 20 25550.00
## 21 28937.00
## 22 30737.00
## 23 30534.00
## 24 34419.00
## 25 34351.00
## 26 30561.00
## 27 27844.00
## 28 25133.00
## 29 25631.00
## 30 30298.00
## 31 30923.00
## 32 31964.00
## 33 27963.00
## 34 27468.00
## 35 25872.00
## 36 26333.00
## 37 28916.00
## 38 31784.00
## 39 34174.00
## 40 35901.00
## 41 27380.00
## 42 22605.00
## 43 25219.00
## 44 26641.00
## 45 29568.00
## 46 33056.00
## 47 29185.00
## 48 25767.00
## 49 22523.00
## 50 22449.00
## 51 25056.00
## 52 27490.00
## 53 27605.00
## 54 24810.00
## 55 21504.00
## 56 18236.00
## 57 22663.00
## 58 21193.00
## 59 26705.00
## 60 24710.00
## 61 24701.00
## 62 20171.00
## 63 20992.00
## 64 20853.00
## 65 21319.00
## 66 26513.00
## 67 24555.00
## 68 21590.00
## 69 20105.00
## 70 18798.00
## 71 16629.00
## 72 19385.00
## 73 22601.00
## 74 23522.00
## 75 23762.00
## 76 21639.00
## 77 20415.00
## 78 19982.00
## 79 20312.00
## 80 20839.00
## 81 23352.00
## 82 23106.00
## 83 18823.00
## 84 17012.00
## 85 17175.00
## 86 20803.00
## 87 22032.00
## 88 23477.00
## 89 25341.00
## 90 21373.00
## 91 18510.00
## 92 23435.00
## 93 23873.00
## 94 27527.00
## 95 31046.00
## 96 31994.00
## 97 27284.00
## 98 27017.00
## 99 33021.00
## 100 38684.00
## 101 39072.00
## 102 44421.00
## 103 43783.00
## 104 41857.00
## 105 39175.00
## 106 47462.00
## 107 50674.00
## 108 53826.00
## 109 54223.00
## 110 54734.00
## 111 45789.00
## 112 41600.00
## 113 51766.00
## 114 62147.00
## 115 58836.00
## 116 66645.00
## 117 63007.00
## 118 60978.00
## 119 58465.00
## 120 62879.00
## 121 65382.00
## 122 70953.00
## 123 77233.00
## 124 65180.00
## 125 64884.00
## 126 56971.00
## 127 63642.00
## 128 69150.00
## 129 71027.00
## 130 75193.00
## 131 65413.00
## 132 61713.00
## 133 60022.23
## 134 63703.79
## 135 65465.01
## 136 66514.70
## 137 65662.89
## 138 65030.57
## 139 64549.13
b. 90 day regressor var
new.regressor90 <- data.frame(c(us$positiveIncrease, mlp.new.fore90$mean))
new.regressor90
## c.us.positiveIncrease..mlp.new.fore90.mean.
## 1 3613.00
## 2 3171.00
## 3 4671.00
## 4 6255.00
## 5 6885.00
## 6 9259.00
## 7 11442.00
## 8 10632.00
## 9 12873.00
## 10 17656.00
## 11 19044.00
## 12 19724.00
## 13 19655.00
## 14 21897.00
## 15 24694.00
## 16 25773.00
## 17 27992.00
## 18 31945.00
## 19 33252.00
## 20 25550.00
## 21 28937.00
## 22 30737.00
## 23 30534.00
## 24 34419.00
## 25 34351.00
## 26 30561.00
## 27 27844.00
## 28 25133.00
## 29 25631.00
## 30 30298.00
## 31 30923.00
## 32 31964.00
## 33 27963.00
## 34 27468.00
## 35 25872.00
## 36 26333.00
## 37 28916.00
## 38 31784.00
## 39 34174.00
## 40 35901.00
## 41 27380.00
## 42 22605.00
## 43 25219.00
## 44 26641.00
## 45 29568.00
## 46 33056.00
## 47 29185.00
## 48 25767.00
## 49 22523.00
## 50 22449.00
## 51 25056.00
## 52 27490.00
## 53 27605.00
## 54 24810.00
## 55 21504.00
## 56 18236.00
## 57 22663.00
## 58 21193.00
## 59 26705.00
## 60 24710.00
## 61 24701.00
## 62 20171.00
## 63 20992.00
## 64 20853.00
## 65 21319.00
## 66 26513.00
## 67 24555.00
## 68 21590.00
## 69 20105.00
## 70 18798.00
## 71 16629.00
## 72 19385.00
## 73 22601.00
## 74 23522.00
## 75 23762.00
## 76 21639.00
## 77 20415.00
## 78 19982.00
## 79 20312.00
## 80 20839.00
## 81 23352.00
## 82 23106.00
## 83 18823.00
## 84 17012.00
## 85 17175.00
## 86 20803.00
## 87 22032.00
## 88 23477.00
## 89 25341.00
## 90 21373.00
## 91 18510.00
## 92 23435.00
## 93 23873.00
## 94 27527.00
## 95 31046.00
## 96 31994.00
## 97 27284.00
## 98 27017.00
## 99 33021.00
## 100 38684.00
## 101 39072.00
## 102 44421.00
## 103 43783.00
## 104 41857.00
## 105 39175.00
## 106 47462.00
## 107 50674.00
## 108 53826.00
## 109 54223.00
## 110 54734.00
## 111 45789.00
## 112 41600.00
## 113 51766.00
## 114 62147.00
## 115 58836.00
## 116 66645.00
## 117 63007.00
## 118 60978.00
## 119 58465.00
## 120 62879.00
## 121 65382.00
## 122 70953.00
## 123 77233.00
## 124 65180.00
## 125 64884.00
## 126 56971.00
## 127 63642.00
## 128 69150.00
## 129 71027.00
## 130 75193.00
## 131 65413.00
## 132 61713.00
## 133 60022.23
## 134 63703.79
## 135 65465.01
## 136 66514.70
## 137 65662.89
## 138 65030.57
## 139 64549.13
## 140 64841.09
## 141 65153.74
## 142 65416.89
## 143 65407.32
## 144 65312.68
## 145 65086.79
## 146 65086.33
## 147 65141.91
## 148 65285.80
## 149 65337.72
## 150 65339.22
## 151 65203.62
## 152 65132.75
## 153 65136.06
## 154 65237.91
## 155 65310.28
## 156 65340.96
## 157 65272.21
## 158 65162.37
## 159 65138.63
## 160 65197.44
## 161 65281.46
## 162 65327.43
## 163 65312.92
## 164 65202.48
## 165 65154.10
## 166 65163.65
## 167 65246.95
## 168 65304.72
## 169 65326.67
## 170 65256.25
## 171 65172.70
## 172 65153.24
## 173 65212.76
## 174 65280.81
## 175 65320.37
## 176 65297.83
## 177 65197.72
## 178 65161.98
## 179 65180.20
## 180 65253.64
## 181 65302.96
## 182 65317.58
## 183 65239.50
## 184 65176.04
## 185 65161.42
## 186 65224.66
## 187 65283.02
## 188 65317.72
## 189 65283.32
## 190 65192.38
## 191 65163.94
## 192 65194.32
## 193 65261.09
## 194 65305.04
## 195 65309.56
## 196 65223.43
## 197 65175.53
## 198 65169.22
## 199 65235.08
## 200 65287.03
## 201 65316.29
## 202 65267.72
## 203 65188.04
## 204 65164.37
## 205 65207.13
## 206 65268.25
## 207 65308.30
## 208 65300.29
## 209 65210.14
## 210 65173.17
## 211 65178.87
## 212 65244.87
## 213 65292.20
## 214 65314.14
## 215 65250.46
## 216 65184.51
## 217 65165.45
## 218 65218.85
## 219 65274.88
## 220 65311.36
## 221 65288.90
## 222 65200.09
mlp.fit1 = mlp(tUScurrhop, xreg = tUSx, outplot = T, comb = "mean")
plot(mlp.fit1)
currhosp.fore7 = forecast(mlp.fit1, h = 7, xreg = new.regressor7)
plot(currhosp.fore7)
b. Currently Hospitalized 90 Day Forecast w/ Positive Increaser Regressor
currhosp.fore90 = forecast(mlp.fit1, h = 90, xreg = new.regressor90)
plot(currhosp.fore90)
ASEmlr = mean((us$hospitalizedCurrently[126:132]-currhosp.fore7$mean)^2)
ASEmlr
## [1] 12199114
We completed a default neural network model. With so many opportunities for how to actually tune neural network model we knew this would not be our best model in this case. So we moved forward with a hypertuned neural network model that allows us to calculate many windowed ASEs and compare those model against eachother.
We have leveraged the tswgewrapper code above to produce a hyperparameter tuned NN model for our ensemble model. This model will work as our higher functioning ensemble model. Below are the steps to surface the best hyperparameters based on our grid search
#if (!require(devtools)) {
# install.packages("devtools")
#}
#devtools::install_github("josephsdavid/tswgewrapped", build_vignettes = TRUE)
library(tswgewrapped)
data_train.m <- data.frame(hospitalizedCurrently = us$hospitalizedCurrently[1:122], positiveIncrease = us$positiveIncrease[1:122])
data_test.m <- data.frame(hospitalizedCurrently = us$hospitalizedCurrently[123:132], positiveIncrease = us$positiveIncrease[123:132])
# search for best NN hyperparameters in given grid
model.m = tswgewrapped::ModelBuildNNforCaret$new(data = data_train.m, var_interest = "hospitalizedCurrently",
search = 'random', tuneLength = 5, parallel = TRUE,
batch_size = 50, h = 7, m = 7,
verbose = 1)
## Registered S3 method overwritten by 'lava':
## method from
## print.pcor greybox
## Aggregating results
## Selecting tuning parameters
## Fitting reps = 10, hd = 2, allow.det.season = TRUE on full training set
## - Total Time for training: : 52.316 sec elapsed
res.m <- model.m$summarize_hyperparam_results()
res.m
## reps hd allow.det.season RMSE ASE RMSESD ASESD
## 1 10 2 TRUE 3103.429 13970241 2184.689 16296534
## 2 11 3 FALSE 3129.341 14942705 2380.109 19673017
## 3 16 1 FALSE 3559.638 16480775 2047.127 15988977
## 4 16 3 TRUE 3204.752 15391189 2373.358 19509361
## 5 22 3 FALSE 3268.191 16196849 2463.200 19979406
model.m$plot_hyperparam_results()
best.m <- model.m$summarize_best_hyperparams()
best.m
## reps hd allow.det.season
## 1 10 2 TRUE
final.ase.m <- dplyr::filter(res.m, reps == best.m$reps &
hd == best.m$hd &
allow.det.season == best.m$allow.det.season)[['ASE']]
final.ase.m
## [1] 13970241
# Final Model
caret_model.m = model.m$get_final_models(subset = 'a')
caret_model.m$finalModel
## MLP fit with 2 hidden nodes and 10 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,3)
## Forecast combined using the median operator.
## MSE: 1065689.5459.
# Plot Final Model
plot(caret_model.m$finalModel)
#Ensemble model
ensemble.mlp = mlp(tUScurrhop, xreg = tUSx, outplot = T, reps = 10, hd = 2, allow.det.season = T)
ensemble.mlp
## MLP fit with 2 hidden nodes and 10 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,3)
## Forecast combined using the median operator.
## MSE: 1025155.9706.
#Plot ensemble model
plot(ensemble.mlp)
fore7.m = forecast(ensemble.mlp, xreg = new.regressor7, h=7)
plot(fore7.m)
fore90.m = forecast(ensemble.mlp, xreg = new.regressor90, h=90)
plot(fore90.m)